# rm(list=ls(all=TRUE))



###  will run as source file-- remove wd, clear environment, and packages
### add description

# library(tidyverse)
# library(scales)
# 
# setwd("~/Dropbox/Mayors Project (PAK)/Business_Mayors") 
# source("~/Dropbox/Mayors Project (PAK)/common data/code/mayors_source.R")


## 2000 census data for cities of 50K+
load("replication_data_2000_census.RData")

## mayoral candidate data
load("replication_data_mayoral_candidates.RData")


## city summary statistics for Table 1
means_all <- 
  city_census_data_2000 %>% 
  summarise(sample = "all",
            statistic = "mean",
            count = n(),
            population = mean(population, na.rm=TRUE),
            pct_white = mean(pctwhite, na.rm=TRUE),
            unemp = mean(unemp, na.rm=TRUE),
            med_hh_inc = mean(medhhinc, na.rm=TRUE),
            home_own = mean(homeown, na.rm=TRUE),
            med_house_val = mean(medhouseval, na.rm=TRUE)) %>% 
  pivot_longer(cols = count:med_house_val, names_to = "variable", values_to = "value")


sds_all <- 
  city_census_data_2000 %>% 
  summarise(sample = "all",
            statistic = "standard deviation",
            population = sd(population, na.rm=TRUE),
            pct_white = sd(pctwhite, na.rm=TRUE),
            unemp = sd(unemp, na.rm=TRUE),
            med_hh_inc = sd(medhhinc, na.rm=TRUE),
            home_own = sd(homeown, na.rm=TRUE),
            med_house_val = sd(medhouseval, na.rm=TRUE)) %>% 
  pivot_longer(cols = population:med_house_val, names_to = "variable", values_to = "value")


means_candidates_sample <- 
  city_census_data_2000 %>% 
  filter(candidates_sample == TRUE) %>% 
  summarise(sample = "candidates",
            statistic = "mean",
            count = n(),
            population = mean(population, na.rm=TRUE),
            pct_white = mean(pctwhite, na.rm=TRUE),
            unemp = mean(unemp, na.rm=TRUE),
            med_hh_inc = mean(medhhinc, na.rm=TRUE),
            home_own = mean(homeown, na.rm=TRUE),
            med_house_val = mean(medhouseval, na.rm=TRUE)) %>% 
  pivot_longer(cols = count:med_house_val, names_to = "variable", values_to = "value")



sds_candidates_sample <- 
  city_census_data_2000 %>% 
  filter(candidates_sample == TRUE) %>% 
  summarise(sample = "candidates",
            statistic = "standard deviation",
            population = sd(population, na.rm=TRUE),
            pct_white = sd(pctwhite, na.rm=TRUE),
            unemp = sd(unemp, na.rm=TRUE),
            med_hh_inc = sd(medhhinc, na.rm=TRUE),
            home_own = sd(homeown, na.rm=TRUE),
            med_house_val = sd(medhouseval, na.rm=TRUE)) %>% 
  pivot_longer(cols = population:med_house_val, names_to = "variable", values_to = "value")



means_RD_sample <- 
  city_census_data_2000 %>% 
  filter(RD_sample == TRUE) %>% 
  summarise(sample = "RD",
            statistic = "mean",
            count = n(),
            population = mean(population, na.rm=TRUE),
            pct_white = mean(pctwhite, na.rm=TRUE),
            unemp = mean(unemp, na.rm=TRUE),
            med_hh_inc = mean(medhhinc, na.rm=TRUE),
            home_own = mean(homeown, na.rm=TRUE),
            med_house_val = mean(medhouseval, na.rm=TRUE)) %>% 
  pivot_longer(cols = count:med_house_val, names_to = "variable", values_to = "value")



sds_RD_sample <- 
  city_census_data_2000 %>% 
  filter(RD_sample == TRUE) %>% 
  summarise(sample = "RD",
            statistic = "standard deviation",
            population = sd(population, na.rm=TRUE),
            pct_white = sd(pctwhite, na.rm=TRUE),
            unemp = sd(unemp, na.rm=TRUE),
            med_hh_inc = sd(medhhinc, na.rm=TRUE),
            home_own = sd(homeown, na.rm=TRUE),
            med_house_val = sd(medhouseval, na.rm=TRUE)) %>% 
  pivot_longer(cols = population:med_house_val, names_to = "variable", values_to = "value")


summary_stats_long <-
  bind_rows(means_all, sds_all, 
            means_candidates_sample, sds_candidates_sample, 
            means_RD_sample, sds_RD_sample) %>% 
  mutate(statistic = ifelse(variable == "count", "count", statistic)) %>% 
  pivot_wider(names_from = sample, values_from = value) %>% 
  mutate(statistic = factor(statistic, levels = c("count", "mean", "standard deviation")),
         variable = factor(variable, levels = c("count", "population", "pct_white", "unemp", "med_hh_inc", "home_own", "med_house_val"))) %>% 
  arrange(variable, statistic)



summary_stats_cities <-
  bind_rows(means_all, sds_all, 
            means_candidates_sample, sds_candidates_sample, 
            means_RD_sample, sds_RD_sample) %>% 
  mutate(statistic = ifelse(variable == "count", "count", statistic),
         value = case_when(variable == "population" & statistic == "mean" ~ comma(value), 
                           (variable == "unemp" | variable == "pct_white" | variable == "home_own") & statistic == "mean" ~ paste0(format_num(value, digits = 2), "%"),
                           (variable == "med_hh_inc" | variable == "med_house_val") & statistic == "mean" ~ paste0("$", comma(value)),
                           statistic == "standard deviation" ~ paste0("(", format_num(value, digits = 2), ")"),
                           TRUE ~ as.character(value))) %>% 
  pivot_wider(names_from = sample, values_from = value) %>% 
  mutate(variable_label = case_when(statistic == "standard deviation" ~ "    ", 
                                    variable == "count" ~ "Count",
                                    variable == "population" ~ "Population",
                                    variable == "pct_white" ~ "White population",
                                    variable == "unemp" ~ "Unemployment rate",
                                    variable == "med_hh_inc" ~ "Median household income",
                                    variable == "home_own" ~ "Home ownership rate",
                                    variable == "med_house_val" ~ "Median house value"),
         statistic = factor(statistic, levels = c("count", "mean", "standard deviation")),
         variable = factor(variable, levels = c("count", "population", "pct_white", "unemp", "med_hh_inc", "home_own", "med_house_val"))) %>% 
  arrange(variable, statistic)




### mayors' occupations over time (Figure 1)
gg_occupation_year4_mayors <- 
  candidates %>% 
  filter(win == 1) %>%
  group_by(year4, occupation_cat) %>% 
  summarise(n_occupation = n()) %>% 
  ungroup() %>%
  group_by(year4) %>% 
  mutate(n_year4 = sum(n_occupation),
         occupation_share = n_occupation/n_year4) %>% 
  filter(occupation_cat %in% c("business owner/executive", "attorney", "public employee")) %>% 
  mutate(occupation_cat = str_to_title(occupation_cat),
         occupation_cat = factor(occupation_cat, levels = c("Business Owner/Executive", "Attorney", "Public Employee")))


#### Descriptive statistics - candidates' race, gender, & political experience (Table 2)

tab_race <-
  candidates %>%
  group_by(race) %>%
  summarise(n = n(),
            n_mayor = sum(win, na.rm=TRUE),
            win_rate = mean(win, na.rm=TRUE),
            prop_mayor = n_mayor/sum(candidates$win, na.rm=TRUE),
            n_runnerup = sum(win == 0, na.rm=TRUE),
            lose_rate = mean(win == 0, na.rm=TRUE),
            prop_runnerup = n_runnerup/sum(candidates$win == 0, na.rm=TRUE)) %>%
  mutate(race = factor(race, levels = c("white", "black", "hispanic", "asian"),
                       labels = c("White", "Black", "Latino", "Asian")),
         pct_mayor = paste0(format_num(prop_mayor*100, digits = 1), "%"),
         pct_runnerup = paste0(format_num(prop_runnerup*100, digits = 1), "%")) %>%
  arrange(-prop_mayor)


tab_gender <-
  candidates %>%
  group_by(gender) %>%
  summarise(n = n(),
            n_mayor = sum(win, na.rm=TRUE),
            win_rate = mean(win, na.rm=TRUE),
            prop_mayor = n_mayor/sum(candidates$win, na.rm=TRUE),
            n_runnerup = sum(win == 0, na.rm=TRUE),
            lose_rate = mean(win == 0, na.rm=TRUE),
            prop_runnerup = n_runnerup/sum(candidates$win == 0, na.rm=TRUE)) %>%
  mutate(gender = factor(gender, levels = c("0", "1"),
                         labels = c("Men", "Women")),
         pct_mayor = paste0(format_num(prop_mayor*100, digits = 1), "%"),
         pct_runnerup = paste0(format_num(prop_runnerup*100, digits = 1), "%")) %>%
  arrange(-prop_mayor)


#### Political Experience

tab.experience <- with(candidates, table(no_experience, win))[, 2:1]
tab.council <- with(candidates, table(council, win))[, 2:1]
tab.county <- with(candidates, table(county_elect, win))[, 2:1]
tab.state <- with(candidates, table(state_elect, win))[, 2:1]
tab.us <- with(candidates, table(us_elect, win))[, 2:1]
tab.mayor <- with(candidates, table(mayor, win))[, 2:1]
tab.incumbent <- with(candidates, table(incumbent, win))[, 2:1]

none <- c(format_num(tab.experience[2,1], digits=0), format_num(prop.table(tab.experience, 2)[2,1]), 
          format_num(tab.experience[2,2], digits=0), format_num(prop.table(tab.experience, 2)[2,2]))

council <- c(format_num(tab.council[2,1], digits=0), format_num(prop.table(tab.council, 2)[2,1]), 
             format_num(tab.council[2,2], digits=0), format_num(prop.table(tab.council, 2)[2,2]))

county <- c(format_num(tab.county[2,1], digits=0), format_num(prop.table(tab.county, 2)[2,1]), 
            format_num(tab.county[2,2], digits=0), format_num(prop.table(tab.county, 2)[2,2]))

state <- c(format_num(tab.state[2,1], digits=0), format_num(prop.table(tab.state, 2)[2,1]), 
           format_num(tab.state[2,2], digits=0), format_num(prop.table(tab.state, 2)[2,2]))

us <- c(format_num(tab.us[2,1], digits=0), format_num(prop.table(tab.us, 2)[2,1]), 
        format_num(tab.us[2,2], digits=0), format_num(prop.table(tab.us, 2)[2,2]))

mayor <- c(format_num(tab.mayor[2,1], digits=0), format_num(prop.table(tab.mayor, 2)[2,1]), 
           format_num(tab.mayor[2,2], digits=0), format_num(prop.table(tab.mayor, 2)[2,2]))

incumbent <- c(format_num(tab.incumbent[2,1], digits=0), format_num(prop.table(tab.incumbent, 2)[2,1]), 
               format_num(tab.incumbent[2,2], digits=0), format_num(prop.table(tab.incumbent, 2)[2,2]))


tab_experience <- rbind.data.frame(as.numeric(as.character(none)),
                                   as.numeric(as.character(council)), 
                                   as.numeric(as.character(mayor)), 
                                   as.numeric(as.character(incumbent)), 
                                   as.numeric(as.character(state)), 
                                   as.numeric(as.character(county)), 
                                   as.numeric(as.character(us)))
names(tab_experience) <- c("n_mayor", "prop_mayor", "n_runnerup", "prop_runnerup")
tab_experience$experience <- factor(c("No Experience", "City Council", "Mayor", "Incumbent", "State Legislator", 
                                      "County Legislator", "US Legislator"))
tab_experience <- mutate(tab_experience,
                         n = n_mayor + n_runnerup,
                         win_rate = n_mayor/n,
                         lose_rate = n_runnerup/n,
                         pct_mayor = paste0(format_num(prop_mayor*100, digits = 1), "%"),
                         pct_runnerup = paste0(format_num(prop_runnerup*100, digits = 1), "%"))


#### candidates' occupational backgrounds (Table 3)

tab_occupation <-
  candidates %>%
  group_by(occupation_cat) %>% 
  summarise(n = n(),
            n_mayor = sum(win, na.rm=TRUE),
            win_rate = mean(win, na.rm=TRUE),
            prop_mayor = n_mayor/sum(candidates$win, na.rm=TRUE),
            n_runnerup = sum(win == 0, na.rm=TRUE),
            lose_rate = mean(win == 0, na.rm=TRUE),
            prop_runnerup = n_runnerup/sum(candidates$win, na.rm=TRUE)) %>% 
  mutate(occupation_cat = factor(occupation_cat,
                                 levels = c("business owner/executive", "attorney",
                                            "public employee", "manager/supervisor", "sales", 
                                            "educator", "administrator", "other professional", 
                                            "healthcare professional", "homemaker", "other"),
                                 labels = c("Business owner/executive", "Attorney",
                                            "Public employee", "Manager/supervisor", "Sales", 
                                            "Educator", "Administrator", "Other professional", 
                                            "Healthcare professional", "Homemaker", "Other occupations")),
         pct_mayor = paste0(format_num(prop_mayor*100, digits = 1), "%"),
         pct_runnerup = paste0(format_num(prop_runnerup*100, digits = 1), "%")) %>%
  arrange(occupation_cat)  

